do "E:\yungyu\lottery_baby\do\set_environment.do"

clear
set more off

global control1 "i.pers_gender i.houhld_resid_cd pers_income_pre1 pers_asset_pre1 pers_earnings_pre1 pers_labor_pre1 married_pre1 totbaby_pre1"
global control2 "totbaby_pre1"
global control3 "pre*redeem*"

cap rm "$table/temp/TabE1.txt"
cap rm "$table/temp/TabE1.xls"

foreach y in 50 2 { //Cut at different amount
	use "$wdata\project_lottery_fertility_b3a6_first_`y'k.dta", clear

	drop if prize10M > 5

	sum totbaby if current == 0 & norm == -1
	loc m0 = r(mean)
	sum totbaby if current == 0 & norm == 6
	loc m1 = r(mean)
	loc bt = `m1' - `m0'
	
	replace norm_year = norm_year + 3
	
	reghdfe totbaby currentxprizexp* prize10M c.prize10M#i.norm c.prize10M#1.current i.age $control1 $control2 $control3, a(norm#current year) cl(houseid)
	outreg2 using "$table/temp/TabE1.xls" , append bd(3) sd(3) nocon keep(currentxprize*) ctitle(`y'K) addtext(Baseline Trend, `bt')

}

use "$wdata\project_lottery_fertility_b3a6_first_5k.dta", clear

drop if prize10M > 5

sum totbaby if current == 0 & norm == -1
loc m0 = r(mean)
sum totbaby if current == 0 & norm == 6
loc m1 = r(mean)
loc bt = `m1' - `m0'

foreach x in pers spouse{
	gen `x'_age = year - `x'_born_year
}

gen male_age = pers_age if pers_gender == 1
replace male_age = spouse_age if pers_gender == 2
replace male_age = 999 if male_age == .
gen female_age = pers_age if pers_gender == 2
replace female_age = spouse_age if pers_gender == 1
replace female_age = 999 if female_age == .

replace norm_year = norm_year + 3

reghdfe totbaby currentxprizexp* prize10M c.prize10M#i.norm c.prize10M#1.current $control1 $control2 $control3, a(norm#current year male_age female_age) cl(houseid)
outreg2 using "$table/temp/TabE1.xls" , append bd(3) sd(3) nocon keep(currentxprize*) ctitle(GenderAgeFE) addtext(Baseline Trend, `bt')

reghdfe totbaby currentxprizexp* prize10M c.prize10M#i.norm c.prize10M#1.current, a(norm#current year age houseid) cl(houseid)
outreg2 using "$table/temp/TabE1.xls" , append bd(3) sd(3) nocon keep(currentxprize*) ctitle(WinnerFE) addtext(Baseline Trend, `bt')


use "$wdata\project_lottery_fertility_b3a6_single_5k.dta", clear

drop if prize10M > 5

sum totbaby if current == 0 & norm == -1
loc m0 = r(mean)
sum totbaby if current == 0 & norm == 6
loc m1 = r(mean)
loc bt = `m1' - `m0'

replace norm_year = norm_year + 3

reghdfe totbaby currentxprizexp* prize10M c.prize10M#i.norm c.prize10M#1.current i.age $control1 $control2 $control3, a(norm#current year) cl(houseid)
outreg2 using "$table/temp/TabE1.xls" , append bd(3) sd(3) nocon keep(currentxprize*) ctitle(SingleEvent) addtext(Baseline Trend, `bt')


*****************************************************************************	

use "$wdata\project_lottery_fertility_b3a6_first_5k.dta", clear

drop if prize10M > 5

foreach var in pers_asset {  //avg_asset

	gen `var'_grp=.
	replace `var'_grp=0 if `var'<0
	replace `var'_grp=1 if `var'==0	
	replace `var'_grp=2 if `var'>0 		& `var'<=100 // 0-10萬
	replace `var'_grp=3 if `var'>100	& `var'<=500  // 10萬-50萬
	replace `var'_grp=4 if `var'>500	& `var'<=1000  // 50萬-100萬	
	replace `var'_grp=5 if `var'>1000	& `var'<=3000  // 100萬-200萬
	replace `var'_grp=6 if `var'>3000	& `var'<=5000  // 200萬-500萬	
	replace `var'_grp=7 if `var'>5000	& `var'<=10000   // 500萬-1000萬
	replace `var'_grp=8 if `var'>10000	  // 1000萬	
	tab `var'_grp
}

foreach var in pers_earnings {   //avg_earnings

	gen `var'_grp=.
	replace `var'_grp=1 if `var'<=0
	replace `var'_grp=2 if `var'>0 		& `var'<=100 // 0-10萬
	replace `var'_grp=3 if `var'>100	& `var'<=200  // 10萬-20萬	
	replace `var'_grp=4 if `var'>200	& `var'<=300  // 20萬-30萬
	replace `var'_grp=5 if `var'>300	& `var'<=500  // 30萬-50萬	
	replace `var'_grp=6 if `var'>500	& `var'<=800  // 50萬-80萬		
	replace `var'_grp=7 if `var'>800	& `var'<=1000  // 80萬-100萬
	replace `var'_grp=8 if `var'>1000	& `var'<=1500  // 100萬-150萬	
	replace `var'_grp=9 if `var'>1500   // 150萬以上
	tab `var'_grp
}


merge m:1 win_age pers_gender pers_earnings_grp pers_asset_grp married using "$wdata\reweight_earn_asset.dta"
drop if _m == 2
*drop _m

*** 把每個人的weight調整成pre1 同個人的weight在不同年要一樣)
bysort houseid : gen weight_pre1=weight if norm_year==-1
order weight weight_pre1, after(norm_year)
drop weight
bysort houseid : egen weight=sum(weight_pre1)
drop weight_pre1

replace norm_year = norm_year + 3

** Popuation weighting (single age)
sum totbaby [aw=weight] if current == 0 & norm == 2
loc m0 = r(mean)
sum totbaby [aw=weight] if current == 0 & norm == 9
loc m1 = r(mean)
loc bt = `m1' - `m0'
dis `bt'

reghdfe totbaby currentxprizexp* prize10M c.prize10M#i.norm c.prize10M#1.current i.age $control1 $control2 $control3 [aw=weight], a(norm#current year) cl(houseid)
outreg2 using "$table/temp/TabE1.xls" , append bd(3) sd(3) nocon keep(currentxprize*) ctitle(Weighted) addtext(Baseline Trend, `bt')


** Future to current

use "$wdata\project_lottery_fertility_b3a6_first_5k.dta", clear

drop if prize10M > 5
keep if norm == 0
keep current houseid win_year win_age
set seed 20240627
gen random = rnormal()
sort win_year win_age current random
by win_year win_age current: gen id = _n

gen select = 0
forv i = 96(1)101{
	dis "Cohort `i'"
	forv j = 20(1)44{
		dis "- Age `j'"
		dis "-- Number of Current"
		count if win_year == `i' & win_age == `j' & current == 1
		loc N1 = r(N)
		dis "-- Number of Future"
		count if win_year == `i' & win_age == `j' & current == 0
		loc N0 = r(N)
		if `N0' >= `N1'{ //Future > Current
			dis "-- Future > Current"
			replace select = 1 if current == 0 & win_year == `i' & win_age == `j' & id <= `N1'
		}
		if `N0' < `N1' & `N0'*2 >= `N1'{ // 0.5*Current < Future < Current
			dis "-- 0.5*Current < Future < Current"
			replace select = 1 if current == 0 & win_year == `i' & win_age == `j' 
			replace select = 2 if current == 0 & win_year == `i' & win_age == `j' & id <= `N1' - `N0'
		}
		if `N0'*2 < `N1'{ // Future < 0.5*Current
			dis "-- Future < 0.5*Current"
			replace select = 2 if current == 0 & win_year == `i' & win_age == `j' 
			replace select = 3 if current == 0 & win_year == `i' & win_age == `j' & id <= `N1' - 2*`N0'
		}
	}
}

replace select = 1 if current == 1
tab win_age current [fw=select]

drop if select == 0

joinby houseid using "$wdata\project_lottery_fertility_b3a6_first_5k.dta"


sum totbaby [fw=select] if current == 0 & norm == -1
loc m0 = r(mean)
sum totbaby [fw=select] if current == 0 & norm == 6
loc m1 = r(mean)
loc bt = `m1' - `m0'

replace norm_year = norm_year + 3

reghdfe totbaby currentxprizexp* prize10M c.prize10M#i.norm c.prize10M#1.current i.age $control1 $control2 $control3 [fw=select], a(norm#current year) cl(houseid)
outreg2 using "$table/temp/TabE1.xls" , append bd(3) sd(3) nocon keep(currentxprize*) ctitle(FutureAgeMatchedCurrent) addtext(Baseline Trend, `bt')

import delimited using "$table/temp/TabE1.txt", clear
export excel "$table/Tables.xlsx", sheet("TabE1") sheetreplace

clear
set more off

global control1 "i.pers_gender i.houhld_resid_cd pers_income_pre1 pers_asset_pre1 pers_earnings_pre1 pers_labor_pre1 married_pre1 totbaby_pre1"
global control2 "totbaby_pre1"
global control3 "pre*prize"

clear 
set obs 6 //N of cohort
gen win_year = 95 + [_n]
expand 9
bysort win_year: gen parm = _n
gen coef = .
gen se = .
gen n = .
gen id = 0

sort id win_year parm

save "$wdata\NonStagger_MainResult.dta", replace

use "$wdata\project_lottery_fertility_b3a6_first_5k.dta"

drop if prize10M > 5
	
replace norm = norm + 3
	
forv c = 96(1)101{
	reghdfe totbaby currentxprizexp* prize10M c.prize10M#i.norm c.prize10M#1.current i.age $control1 $control2 $control3 if win_year == `c', a(norm#current year) cl(houseid)
	
	local n`c' = e(N)
	matrix b = e(b)
	matrix V = e(V)
		
	forv i = 1(1)9{
		local coef`c'`i' = b[1,`i']
		local se`c'`i' =  sqrt(V[`i',`i'])
	}			
}
use "$wdata\NonStagger_MainResult.dta", clear
	
forv c = 96(1)101{
	replace n = `n`c'' if win_year == `c' & id == 0
	forv i = 1(1)9{
		replace coef = `coef`c'`i'' if win_year == `c' & parm == `i' & id == 0
		replace se = `se`c'`i'' if win_year == `c' & parm == `i' & id == 0
	}
}
	
save "$wdata\NonStagger_MainResult.dta", replace

dis in red "Sample: `x'; Cutoff: `y'k"
dis "Date: `c(current_time)', Time: `c(current_time)'"

qui{
	clear
	set obs 6 //N of cohort
	gen win_year = 95 + [_n]
	expand 9
	bysort win_year: gen parm = _n
	gen coef = .
	gen se = .
	gen n = .
	expand 1000
	bysort win_year parm: gen id = _n

	sort id win_year parm

	save "$wdata\NonStagger.dta", replace
}

forv a = 1(1)1000{ //Bootstrap for 1,000 times
	dis "-- Bootstrap `a'..."
	qui{
	set seed `a'
	
	use "$wdata\project_lottery_fertility_b3a6_first_5k.dta" , replace

	drop if prize10M > 5

	keep if norm == 0
	keep win_year current houseid 
	bsample, str(win_year current)
	
	joinby houseid using "$wdata\project_lottery_fertility_b3a6_first_5k.dta"
	
	replace norm = norm + 3
	
	forv c = 96(1)101{
		reghdfe totbaby currentxprizexp* prize10M c.prize10M#i.norm c.prize10M#1.current i.age $control1 $control2 $control3 if win_year == `c', a(norm#current year) cl(houseid)
	
		local n`c' = e(N)
		matrix b = e(b)
		matrix V = e(V)
		
		forv i = 1(1)9{
			local coef`c'`i' = b[1,`i']
			local se`c'`i' =  sqrt(V[`i',`i'])
		}			
	}
	use "$wdata\NonStagger.dta", clear
	
	forv c = 96(1)101{
		replace n = `n`c'' if win_year == `c' & id == `a'
		forv i = 1(1)9{
			replace coef = `coef`c'`i'' if win_year == `c' & parm == `i' & id == `a'
			replace se = `se`c'`i'' if win_year == `c' & parm == `i' & id == `a'
		}
	}
	
	save "$wdata\NonStagger.dta", replace
	}
}

use "$wdata\NonStagger_MainResult.dta", clear
replace coef = coef / 2 //The estimate was based on 10M prize, adjusted to 5M

collapse (mean)coef [fw=n], by(parm)

save "$wdata\NonStagger_Estimate.dta", replace

use "$wdata\NonStagger.dta", clear
replace coef = coef / 2 //The estimate was based on 10M prize, adjusted to 5M

collapse (mean)coef [fw=n], by(parm id)
collapse (sd)se=coef, by(parm)

merge 1:1 parm using "$wdata\NonStagger_Estimate.dta", nogen

gen t = coef/se
gen p = ttail(1000,t)*2

save "$wdata\NonStagger_Estimate.dta", replace

format _all %9.3f

rename se v2
rename coef v1
rename p v3

keep parm v*

reshape long v, i(parm) j(type)

tostring v, replace format(%9.3f) force
replace v = "[" + v + "]" if type == 2 
replace v = v + "*" if type == 1 & v[_n+2] <= "0.100"
replace v = v + "*" if type == 1 & v[_n+2] <= "0.050"
replace v = v + "*" if type == 1 & v[_n+2] <= "0.010"

drop if type == 3

gen row = _n
loc N = _N + 4
set obs `N'

recode row . = 0
sort row
keep v

save "$wdata\TabE1_col8_NonStagger_Estimate.dta",replace
